********************************************************************************
********************************************************************************
* Implements pollution regression discontinuity analysis
********************************************************************************
********************************************************************************

clear all

set maxvar 10000
set more off
capture log close
capture restore

global tmp "d:\tmp\PollutionHealth"
capture !mkdir $tmp

global pollutants "PM10 pm25"
global weather    "temp prcp"



use $tmp\pm_W_dist_county_year_month, clear
merge m:1 county_code year using $tmp\population_county_year, keep(1 3) keepusing(pop_total)
unique county_code if _merge==1		//County-year population data is missing for 85 county codes in at least one year-month (but not necessarily all year-months); as noted in the article's data availability statement, these data are proprietary and can be accessed through the Chinese Center for Disease Control and Prevention
drop _merge
save $tmp\pm_W_dist_county_year_month, replace

use $tmp\pm_W_dist_town_year_month, clear
merge m:1 town_code using $tmp\population_town_2010, keep(1 3) keepusing(pop_total)
unique town_code if _merge==1		//2010 Census population data is missing for 6596 town codes
drop _merge
save $tmp\pm_W_dist_town_year_month, replace



******************************************************************
*** Select 3-year period and spatial unit; prepares file paths ***
*** Aggregates the disaggregate year-month level data		   ***
*** No need to run lines 36-81, as $aggregateSample is published**
*** excluding county-level population data (which is proprietary)*
******************************************************************
*Select ONE
local analysisUnit "county"  
*local analysisUnit "town"  
*local analysisUnit "monitor"

*Select ONE
local yearStart 2013	
*local yearStart 2016

*Select ONE
local yearEnd   2015
*local yearEnd   2018

local pathFile1 "$tmp\pm_W_dist_"
local pathFile2 "_year_month" 
global disaggregateSample `pathFile1'`analysisUnit'`pathFile2'
global aggregateSample `analysisUnit'_`yearStart'_`yearEnd'


*Aggregate year-month data to 3-year means
use $disaggregateSample, clear

*(Monitor-level only: Activate merge command here and change collapse command below) For use in the wind direction analysis applied to monitor-level samples
*Merge in windBlowsToQH_or_calm windBlowsToAlt_or_calm
*This is the proportion of date-(3-)hour observations over monitor by year-month for which wind blows toward boundary or the wind is calm (less mixing from the other side of the boundary)
*merge m:1 monitor_code year month using $tmp\windBlowsToB_monitor_year_month, assert(2 3) keep(3) nogen

keep if inrange(year,`yearStart',`yearEnd')
*keep if month>=11 | month<=3					//Table A21: Winter heating months (as in Ito and Zhang, 2016)
*keep if month>=5 & month<=9					//Table A22: Non-winter months, May to September
*keep if month>=6 & month<=8					//Table A23: Summer months, June to August
*keep if urban==1								//Table A20: Activate for the urban vs. rural subsample analysis
*keep if urban==0

capture gen pop_total=.							//Population is not used in the monitor-level analysis, so this is a token variable
collapse (sum) numDays_pm25 numDays_temp (mean) year PM10 pm25 temp prcp dist_real dist_qinhuai_IZ heat_real heat_qinhuai_IZ urban pop_total, by(`analysisUnit'_code)
*(Monitor-level only: Activate merge command above and change collapse command here) For use in the wind direction analysis applied to monitor-level samples
*collapse (sum) numDays_pm25 numDays_temp (mean) year PM10 pm25 temp prcp dist_real dist_qinhuai_IZ heat_real heat_qinhuai_IZ urban windBlowsToQH_or_calm windBlowsToAlt_or_calm, by(`analysisUnit'_code)

save $aggregateSample, replace



*******************************************************************
*** Replicate Tables 2-3 and A2-A23                    		   	***
*******************************************************************
*Again, locally select 3-year period and spatial unit

*Select ONE
local analysisUnit "county"  
*local analysisUnit "town"  
*local analysisUnit "monitor"

*Select ONE
local yearStart 2013	
*local yearStart 2016

*Select ONE
local yearEnd   2015
*local yearEnd   2018

global aggregateSample `analysisUnit'_`yearStart'_`yearEnd'

use $aggregateSample, clear

		
***Qin-Huai boundary***															//Table A2-A3, A6-A7, A10-A11, and so on

sum year dist_qinhuai_IZ $pollutants $weather if heat_qinhuai_IZ==1				//Table A1
sum year dist_qinhuai_IZ $pollutants $weather if heat_qinhuai_IZ==0

*Running variable
gen     runvar =  dist_qinhuai_IZ if heat_qinhuai_IZ==1
replace runvar = -dist_qinhuai_IZ if heat_qinhuai_IZ==0
*Quadratic
gen     runvar_2 = runvar * runvar
replace runvar_2 = -runvar_2      if heat_qinhuai_IZ==0
*Cubic
gen runvar_3 = runvar * runvar * runvar
*Interaction
gen north_dist1 = heat_qinhuai_IZ * runvar
gen north_dist2 = heat_qinhuai_IZ * runvar_2 
gen north_dist3 = heat_qinhuai_IZ * runvar_3

*Column 1
foreach y of global pollutants {	
	reg `y' heat_qinhuai_IZ runvar north_dist1, robust
	sum `y' if e(sample)
	}

*Column 2
foreach y of global pollutants {
	reg `y' heat_qinhuai_IZ runvar north_dist1 $weather, robust
	}

*Column 3
foreach y of global pollutants {
	reg `y' heat_qinhuai_IZ runvar runvar_2 runvar_3 north_dist1 north_dist2 north_dist3, robust
	}

*Column 4
foreach y of global pollutants {	
	reg `y' heat_qinhuai_IZ runvar runvar_2 runvar_3 north_dist1 north_dist2 north_dist3 $weather, robust
	}

*Column 5
foreach y of global pollutants {	
	reg `y' heat_qinhuai_IZ runvar runvar_2 runvar_3 north_dist1 north_dist2 north_dist3 $weather if dist_qinhuai_IZ<=500, robust
	sum `y' if e(sample)
	}
	
*Column 6
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(tri)
	}

*Column 7
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(tri) covs($weather)
	*rdrobust `y' runvar, c(0) p(1) kernel(tri) covs($weather windBlowsToQH_or_calm)				//Table A15: For use in the wind direction analysis applied to monitor-level samples
	*rdrobust `y' runvar if windBlowsToQH_or_calm>=.4841187, c(0) p(1) kernel(tri) covs($weather)	//Table A15: For use in the wind direction analysis applied to monitor-level samples; .4841187 is p25
	*keep if pop_total~=.																			//Apply for population weights
	*rdrobust `y' runvar, c(0) p(1) kernel(tri) covs($weather) weights(pop_total)					//Table A18: Population weights multiply the kernel function
	}

/*
*Different kernel: Epanechnikov, uniform 
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(epa) covs($weather)
	}
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(uni) covs($weather)
	}
*/

*Column 8
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(tri) h(100) covs($weather)
	}

*Column 9
foreach y of global pollutants {
	rdrobust `y' runvar if dist_qinhuai_IZ<=500, c(0) p(1) kernel(tri) covs($weather)
	}
	
	
***Alternative boundary***														//Table A4-A5, A8-A9, A12-A13, and so on

sum year dist_real $pollutants $weather if heat_real==1							//Table A1
sum year dist_real $pollutants $weather if heat_real==0

capture drop runvar runvar_2 runvar_3 north_dist1 north_dist2 north_dist3
*Running variable
gen     runvar =  dist_real  if heat_real==1
replace runvar = -dist_real  if heat_real==0
*Quadratic
gen     runvar_2 = runvar * runvar
replace runvar_2 = -runvar_2 if heat_real==0
*Cubic
gen runvar_3 = runvar * runvar * runvar
*Interaction
gen north_dist1 = heat_real * runvar
gen north_dist2 = heat_real * runvar_2 
gen north_dist3 = heat_real * runvar_3	

*Column 1				//Note: To replicate Table A14, add "if dist_real>X" directly in regression commands
foreach y of global pollutants {	
	reg `y' heat_real runvar north_dist1, robust
	sum `y' if e(sample)
	}

*Column 2
foreach y of global pollutants {
	reg `y' heat_real runvar north_dist1 $weather, robust
	}

*Column 3
foreach y of global pollutants {
	reg `y' heat_real runvar runvar_2 runvar_3 north_dist1 north_dist2 north_dist3, robust
	}

*Column 4
foreach y of global pollutants {	
	reg `y' heat_real runvar runvar_2 runvar_3 north_dist1 north_dist2 north_dist3 $weather, robust
	}

*Column 5
foreach y of global pollutants {	
	reg `y' heat_real runvar runvar_2 runvar_3 north_dist1 north_dist2 north_dist3 $weather if dist_real<=500, robust
	sum `y' if e(sample)
	}
	
*Column 6
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(tri)
	}

*Column 7
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(tri) covs($weather)
	*rdrobust `y' runvar, c(0) p(1) kernel(tri) covs($weather windBlowsToAlt_or_calm)				//Table A15: For use in the wind direction analysis applied to monitor-level samples
	*rdrobust `y' runvar if windBlowsToAlt_or_calm>=.4768546, c(0) p(1) kernel(tri) covs($weather)	//Table A15: For use in the wind direction analysis applied to monitor-level samples; .4768546 is p25
	*keep if pop_total~=.																			//Apply for population weights
	*rdrobust `y' runvar, c(0) p(1) kernel(tri) covs($weather) weights(pop_total)					//Table A19: Population weights multiply the kernel function
	}
	
/*
*Different kernel: Epanechnikov, uniform 
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(epa) covs($weather)
	}
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(uni) covs($weather)
	}
*/

*Column 8
foreach y of global pollutants {
	rdrobust `y' runvar, c(0) p(1) kernel(tri) h(100) covs($weather)
	}

*Column 9
foreach y of global pollutants {
	rdrobust `y' runvar if dist_real<=500, c(0) p(1) kernel(tri) covs($weather)
	}
	
	
	
*******************************************************************
*** Replicate Plots and Descriptive Statistics         		   	***
*******************************************************************
use county_2013_2015, clear														//Table A1
sum $pollutants $weather pop_total
bysort heat_qinhuai_IZ: sum dist_qinhuai_IZ
bysort heat_real:       sum dist_real

use town_2013_2015, clear
sum
bysort heat_qinhuai_IZ: sum dist_qinhuai_IZ
bysort heat_real:       sum dist_real

use monitor_2013_2015, clear
sum $pollutants
bysort heat_qinhuai_IZ: sum dist_qinhuai_IZ
bysort heat_real:       sum dist_real

use county_2016_2018, clear
sum $pollutants $weather pop_total
bysort heat_qinhuai_IZ: sum dist_qinhuai_IZ
bysort heat_real:       sum dist_real

use town_2016_2018, clear
sum
bysort heat_qinhuai_IZ: sum dist_qinhuai_IZ
bysort heat_real:       sum dist_real

use monitor_2016_2018, clear
sum $pollutants
bysort heat_qinhuai_IZ: sum dist_qinhuai_IZ
bysort heat_real:       sum dist_real


*Locally select 3-year period (unit of analysis is pollution monitor)

local analysisUnit "monitor"

*Select ONE
local yearStart 2013	
*local yearStart 2016

*Select ONE
local yearEnd   2015
*local yearEnd   2018

global aggregateSample `analysisUnit'_`yearStart'_`yearEnd'

use $aggregateSample, clear


***Qin-Huai boundary***
gen runvar     =  dist_qinhuai_IZ if heat_qinhuai_IZ == 1
replace runvar = -dist_qinhuai_IZ if heat_qinhuai_IZ == 0

local maxdist=300																//Fig. A3
local halfmaxdist=`maxdist'/2
twoway lfitci PM10  runvar if runvar>=-`maxdist'&runvar<0 || lfitci PM10 runvar if runvar>=0&runvar<=`maxdist' ///
	|| scatter PM10 runvar if runvar>=-`maxdist'&runvar<=`maxdist', msize(vsmall) mcolor(olive) mstyle(p1) /// 
	legend(off) xlabel(-`maxdist' -`halfmaxdist' 0 `halfmaxdist' `maxdist', labsize(small) angle(0)) xline(0) ///
	ylabel(, labsize(small) angle(0)) ytitle("Mean PM10 ({&mu}g/m{superscript:3})", size(medsmall)) ///
	xtitle("Shortest distance to the boundary (km, positive if on north side)", size(medsmall)) title("Monitor-level PM10 against distance to the Qin-Huai boundary", size(medsmall) color(black)) scheme(s1manual)	

local maxdist=300
local halfmaxdist=`maxdist'/2
twoway lfitci pm25  runvar if runvar>=-`maxdist'&runvar<0 || lfitci pm25 runvar if runvar>=0&runvar<=`maxdist' ///
	|| scatter pm25 runvar if runvar>=-`maxdist'&runvar<=`maxdist', msize(vsmall) mcolor(blue) mstyle(p1) /// 
	legend(off) xlabel(-`maxdist' -`halfmaxdist' 0 `halfmaxdist' `maxdist', labsize(small) angle(0)) xline(0) ///
	ylabel(0(50)150, labsize(small) angle(0)) ytitle("Mean PM2.5 ({&mu}g/m{superscript:3})", size(medsmall)) ///
	xtitle("Shortest distance to the boundary (km, positive if on north side)", size(medsmall)) title("Monitor-level PM2.5 against distance to the Qin-Huai boundary", size(medsmall) color(black)) scheme(s1manual)	

	
***Alternative boundary***
capture drop runvar																//Fig. A4
gen runvar     =  dist_real if heat_real == 1
replace runvar = -dist_real if heat_real == 0

local maxdist=300
local halfmaxdist=`maxdist'/2
twoway lfitci PM10  runvar if runvar>=-`maxdist'&runvar<0 || lfitci PM10 runvar if runvar>=0&runvar<=`maxdist' ///
	|| scatter PM10 runvar if runvar>=-`maxdist'&runvar<=`maxdist', msize(vsmall) mcolor(olive) mstyle(p1) /// 
	legend(off) xlabel(-`maxdist' -`halfmaxdist' 0 `halfmaxdist' `maxdist', labsize(small) angle(0)) xline(0) ///
	ylabel(0(50)200, labsize(small) angle(0)) ytitle("Mean PM10 ({&mu}g/m{superscript:3})", size(medsmall)) ///
	xtitle("Shortest distance to the boundary (km, positive if on north side)", size(medsmall)) title("Monitor-level PM10 against distance to alternative boundary", size(medsmall) color(black)) scheme(s1manual)	

local maxdist=300
local halfmaxdist=`maxdist'/2
twoway lfitci pm25  runvar if runvar>=-`maxdist'&runvar<0 || lfitci pm25 runvar if runvar>=0&runvar<=`maxdist' ///
	|| scatter pm25 runvar if runvar>=-`maxdist'&runvar<=`maxdist', msize(vsmall) mcolor(blue) mstyle(p1) /// 
	legend(off) xlabel(-`maxdist' -`halfmaxdist' 0 `halfmaxdist' `maxdist', labsize(small) angle(0)) xline(0) ///
	ylabel(20(20)120, labsize(small) angle(0)) ytitle("Mean PM2.5 ({&mu}g/m{superscript:3})", size(medsmall)) ///
	xtitle("Shortest distance to the boundary (km, positive if on north side)", size(medsmall)) title("Monitor-level PM2.5 against distance to alternative boundary", size(medsmall) color(black)) scheme(s1manual)	

	
*Locally select 3-year period (unit of analysis is county)

local analysisUnit "county"  

*Select ONE
local yearStart 2013	
*local yearStart 2016

*Select ONE
local yearEnd   2015
*local yearEnd   2018

global aggregateSample `analysisUnit'_`yearStart'_`yearEnd'

use $aggregateSample, clear


***Qin-Huai boundary***
gen runvar     =  dist_qinhuai_IZ if heat_qinhuai_IZ == 1
replace runvar = -dist_qinhuai_IZ if heat_qinhuai_IZ == 0

local maxdist=300																//Fig. A5
local halfmaxdist=`maxdist'/2
twoway lfitci temp runvar if runvar>=-`maxdist'&runvar<0 || lfitci temp runvar if runvar>=0&runvar<=`maxdist' ///
	|| scatter temp runvar if runvar>=-`maxdist'&runvar<=`maxdist', msize(vsmall) mcolor(red) mstyle(p1) /// 
	legend(off) xlabel(-`maxdist' -`halfmaxdist' 0 `halfmaxdist' `maxdist', labsize(small) angle(0)) xline(0) ///
	ylabel(, labsize(small) angle(0)) ytitle("Mean temperature (°C)", size(medsmall)) ///
	xtitle("Shortest distance to the boundary (km, positive if on north side)", size(medsmall)) title("Temperature against distance to the Qin-Huai boundary", size(medsmall) color(black)) scheme(s1manual)	
	
local maxdist=300
local halfmaxdist=`maxdist'/2
twoway lfitci prcp  runvar if runvar>=-`maxdist'&runvar<0 || lfitci prcp runvar if runvar>=0&runvar<=`maxdist' ///
	|| scatter prcp runvar if runvar>=-`maxdist'&runvar<=`maxdist', msize(vsmall) mcolor(black) mstyle(p1) /// 
	legend(off) xlabel(-`maxdist' -`halfmaxdist' 0 `halfmaxdist' `maxdist', labsize(small) angle(0)) xline(0) ///
	ylabel(, labsize(small) angle(0)) ytitle("Mean precipitation (inches/day)", size(medsmall)) ///
	xtitle("Shortest distance to the boundary (km, positive if on north side)", size(medsmall)) title("Precipitation against distance to the Qin-Huai boundary", size(medsmall) color(black)) scheme(s1manual)	


***Alternative boundary***
capture drop runvar																//Fig. A6
gen runvar     =  dist_real if heat_real == 1
replace runvar = -dist_real if heat_real == 0

local maxdist=300
local halfmaxdist=`maxdist'/2
twoway lfitci temp runvar if runvar>=-`maxdist'&runvar<0 || lfitci temp runvar if runvar>=0&runvar<=`maxdist' ///
	|| scatter temp runvar if runvar>=-`maxdist'&runvar<=`maxdist', msize(vsmall) mcolor(red) mstyle(p1) /// 
	legend(off) xlabel(-`maxdist' -`halfmaxdist' 0 `halfmaxdist' `maxdist', labsize(small) angle(0)) xline(0) ///
	ylabel(, labsize(small) angle(0)) ytitle("Mean temperature (°C)", size(medsmall)) ///
	xtitle("Shortest distance to the boundary (km, positive if on north side)", size(medsmall)) title("Temperature against distance to alternative boundary", size(medsmall) color(black)) scheme(s1manual)	
	
local maxdist=300
local halfmaxdist=`maxdist'/2
twoway lfitci prcp  runvar if runvar>=-`maxdist'&runvar<0 || lfitci prcp runvar if runvar>=0&runvar<=`maxdist' ///
	|| scatter prcp runvar if runvar>=-`maxdist'&runvar<=`maxdist', msize(vsmall) mcolor(black) mstyle(p1) /// 
	legend(off) xlabel(-`maxdist' -`halfmaxdist' 0 `halfmaxdist' `maxdist', labsize(small) angle(0)) xline(0) ///
	ylabel(, labsize(small) angle(0)) ytitle("Mean precipitation (inches/day)", size(medsmall)) ///
	xtitle("Shortest distance to the boundary (km, positive if on north side)", size(medsmall)) title("Precipitation against distance to alternative boundary", size(medsmall) color(black)) scheme(s1manual)	


	
******************************************************************
*** Figs. 1, A1, 2, A9, 3                                      ***
******************************************************************
*Fig. 1

*Plot at county level															//Fig. 1, panel A
local analysisUnit "county"  
local pathFile1 "$tmp\pm_W_dist_"
local pathFile2 "_year_month" 
global disaggregateSample `pathFile1'`analysisUnit'`pathFile2'
use $disaggregateSample, clear

merge m:1 county_code using county_code_coordinates, assert(2 3) keep(3) nogen	//Many county-year-month observations of environmental conditions merged to one county observation of latitude and longitude
bysort county_code: keep if _n==1
sum county_code
twoway scatter county_lat county_lng, msize(tiny) mcolor(black) mstyle(p1) /// 
	legend(off) xlabel(, labsize(small) angle(0)) xline(0) ///
	ylabel(, labsize(small) angle(0)) ytitle("County centroid latitude (degrees)", size(medsmall)) ///
	xtitle("County centroid longitude (degrees)", size(medsmall)) title("Coordinates of county centroids in our estimation sample", size(medsmall) color(black)) scheme(s1manual)		

*Plot at town level																//Fig. 1, panel B
local analysisUnit "town"  
local pathFile1 "$tmp\pm_W_dist_"
local pathFile2 "_year_month" 
global disaggregateSample `pathFile1'`analysisUnit'`pathFile2'
use $disaggregateSample, clear

merge m:1 town_code using town_code_coordinates, assert(2 3) keep(3) nogen	//Many county-year-month observations of environmental conditions merged to one county observation of latitude and longitude
bysort town_code: keep if _n==1
sum town_code
twoway scatter town_lat town_lng, msize(vtiny) mcolor(black) mstyle(p1) /// 
	legend(off) xlabel(, labsize(small) angle(0)) xline(0) ///
	ylabel(, labsize(small) angle(0)) ytitle("Town centroid latitude (degrees)", size(medsmall)) ///
	xtitle("Town centroid longitude (degrees)", size(medsmall)) title("Coordinates of town centroids in our estimation sample", size(medsmall) color(black)) scheme(s1manual)		

*Plot at monitor level															//Fig. 1, panel C
local analysisUnit "monitor"  
local pathFile1 "$tmp\pm_W_dist_"
local pathFile2 "_year_month" 
global disaggregateSample `pathFile1'`analysisUnit'`pathFile2'
use $disaggregateSample, clear

bysort monitor_code: keep if _n==1
unique monitor_code
twoway scatter StationLat StationLon, msize(tiny) mcolor(black) mstyle(p1) /// 
	legend(off) xlabel(, labsize(small) angle(0)) xline(0) ///
	ylabel(, labsize(small) angle(0)) ytitle("PM2.5 monitor latitude (degrees)", size(medsmall)) ///
	xtitle("PM2.5 monitor longitude (degrees)", size(medsmall)) title("Coordinates of PM2.5 monitors in our estimation sample", size(medsmall) color(black)) scheme(s1manual)		
	
*Plot at weather station level													//Fig. 1, panel D
*Omit one southernmost station to maintain the axes ranges of the other plots)
use $tmp\W_station_coordinates, clear
sum usaf
twoway scatter usaf_lat usaf_lon if usaf_lat>15, msize(tiny) mcolor(black) mstyle(p1) /// 
	legend(off) xlabel(, labsize(small) angle(0)) xline(0) ///
	ylabel(20(10)50, labsize(small) angle(0)) ytitle("Weather station latitude (degrees)", size(medsmall)) ///
	xtitle("Weather station longitude (degrees)", size(medsmall)) title("Coordinates of weather stations used in constructing our sample", size(medsmall) color(black)) scheme(s1manual)	

	
*Figures showing monitors on either side of the boundary, and monitors for which wind blowing from across the other side of the policy boundary is most prevalent 
*Plot at monitor level
local analysisUnit "monitor"  
local pathFile1 "$tmp\pm_W_dist_"
local pathFile2 "_year_month" 
global disaggregateSample `pathFile1'`analysisUnit'`pathFile2'
use $disaggregateSample, clear

*For use in the wind direction analysis applied to monitor-level samples: Merge in windBlowsToQH_or_calm windBlowsToAlt_or_calm
*This is the proportion of date-(3-)hour observations over monitor by year-month for which wind blows toward boundary or the wind is calm (less mixing from the other side of the boundary)
merge m:1 monitor_code year month using windBlowsToB_monitor_year_month, assert(2 3) keep(3) nogen
*(for verification only) Use the 2016-2018 period to illustrate	
preserve
	keep if inrange(year,2016,2018)
	collapse (mean) StationLon StationLat dist_real dist_qinhuai_IZ heat_real heat_qinhuai_IZ windBlowsToQH_or_calm windBlowsToAlt_or_calm, by(monitor_code)

*(One-quarter of) Monitors for which wind blowing from across the other side of the policy boundary is most prevalent (2016-2018 illustration; Qin-Huai, then alternative boundary)
twoway scatter StationLat StationLon if windBlowsToQH_or_calm<.4841187, msize(tiny) mcolor(black) mstyle(p1) /// 
	legend(off) xlabel(80(10)130, labsize(small) angle(0)) xline(0) ///
	ylabel(20(10)50, labsize(small) angle(0)) ytitle("PM2.5 monitor latitude (degrees)", size(medsmall)) ///
	xtitle("PM2.5 monitor longitude (degrees)", size(medsmall)) title("PM2.5 monitors with prevailing wind from the other side (2016-18)", size(medsmall) color(black)) scheme(s1manual)	
twoway scatter StationLat StationLon if windBlowsToAlt_or_calm<.4768546, msize(tiny) mcolor(black) mstyle(p1) /// 
	legend(off) xlabel(80(10)130, labsize(small) angle(0)) xline(0) ///
	ylabel(20(10)50, labsize(small) angle(0)) ytitle("PM2.5 monitor latitude (degrees)", size(medsmall)) ///
	xtitle("PM2.5 monitor longitude (degrees)", size(medsmall)) title("PM2.5 monitors with prevailing wind from the other side (2016-18)", size(medsmall) color(black)) scheme(s1manual)	
restore


*Fig. A1
*Plot at monitor level
local analysisUnit "monitor"  
local pathFile1 "$tmp\pm_W_dist_"
local pathFile2 "_year_month" 
global disaggregateSample `pathFile1'`analysisUnit'`pathFile2'
use $disaggregateSample, clear

collapse (mean) StationLon StationLat dist_real dist_qinhuai_IZ heat_real heat_qinhuai_IZ, by(monitor_code)
preserve
	append using QinHuaiBoundary_coordinates_every500m
	twoway scatter StationLat StationLon if heat_qinhuai_IZ==1, msize(tiny) mcolor(black) mstyle(p1) || scatter lat_84 lng_84, msize(tiny) mcolor(mint) mstyle(p1) /// 
		legend(off) xlabel(80(10)130, labsize(small) angle(0) format(%5.0f)) ///
		ylabel(20(10)50, labsize(small) angle(0) format(%5.0f)) ytitle("Latitude (degrees)", size(medsmall)) ///
		xtitle("Longitude (degrees)", size(medsmall)) title("PM2.5 monitors on the north side of the Qin-Huai boundary", size(medsmall) color(black)) scheme(s1manual)	
	twoway scatter StationLat StationLon if heat_qinhuai_IZ==0, msize(tiny) mcolor(black) mstyle(p1) || scatter lat_84 lng_84, msize(tiny) mcolor(mint) mstyle(p1) /// 
		legend(off) xlabel(80(10)130, labsize(small) angle(0) format(%5.0f)) ///
		ylabel(20(10)50, labsize(small) angle(0) format(%5.0f)) ytitle("Latitude (degrees)", size(medsmall)) ///
		xtitle("Longitude (degrees)", size(medsmall)) title("PM2.5 monitors on the south side of the Qin-Huai boundary", size(medsmall) color(black)) scheme(s1manual)	
restore
preserve
	append using AlternativeBoundary_coordinates_every500m
	twoway scatter StationLat StationLon if heat_real==1, msize(tiny) mcolor(black) mstyle(p1) || scatter lat_84 lng_84, msize(tiny) mcolor(red) mstyle(p1) /// 
		legend(off) xlabel(80(10)130, labsize(small) angle(0) format(%5.0f)) ///
		ylabel(20(10)50, labsize(small) angle(0) format(%5.0f)) ytitle("Latitude (degrees)", size(medsmall)) ///
		xtitle("Longitude (degrees)", size(medsmall)) title("PM2.5 monitors on the north side of the alternative boundary", size(medsmall) color(black)) scheme(s1manual)	
	twoway scatter StationLat StationLon if heat_real==0, msize(tiny) mcolor(black) mstyle(p1) || scatter lat_84 lng_84, msize(tiny) mcolor(red) mstyle(p1) /// 
		legend(off) xlabel(80(10)130, labsize(small) angle(0) format(%5.0f)) ///
		ylabel(20(10)50, labsize(small) angle(0) format(%5.0f)) ytitle("Latitude (degrees)", size(medsmall)) ///
		xtitle("Longitude (degrees)", size(medsmall)) title("PM2.5 monitors on the south side of the alternative boundary", size(medsmall) color(black)) scheme(s1manual)	
restore	
	
	
*Fig. 2
use QinHuaiBoundary_coordinates_every500m, clear								//Fig. 2, panel A
assert ID==_n
tab Shape_Le_1					//3 values are suspect (out of 15,093 midpoints); interpretation: sum of 15,093 segments = 7,547 km
twoway scatter lat_84 lng_84, msize(tiny) mcolor(mint) mstyle(p1) /// 
	legend(off) xlabel(80(10)130, labsize(small) angle(0) format(%5.0f)) ///
	ylabel(20(10)50, labsize(small) angle(0) format(%5.0f)) ytitle("Latitude (degrees) of boundary segment midpoint", size(medsmall)) ///
	xtitle("Longitude (degrees) of boundary segment midpoint", size(medsmall)) title("Coordinates of Qin-Huai boundary segment midpoint (500m apart)", size(medsmall) color(black)) scheme(s1manual)

use AlternativeBoundary_coordinates_every500m, clear							//Fig. 2, panel B
assert ID==_n
tab Shape_Leng					//12,081 midpoints: sum of 12,081 segments = 6,041 km
twoway scatter lat_84 lng_84, msize(tiny) mcolor(red) mstyle(p1) /// 
	legend(off) xlabel(80(10)130, labsize(small) angle(0) format(%5.0f)) ///
	ylabel(20(10)50, labsize(small) angle(0) format(%5.0f)) ytitle("Latitude (degrees) of boundary segment midpoint", size(medsmall)) ///
	xtitle("Longitude (degrees) of boundary segment midpoint", size(medsmall)) title("Coordinates of alternative boundary segment midpoint (500m apart)", size(medsmall) color(black)) scheme(s1manual)


*Fig. A9	
use county_2013_2015, clear														//Fig. A9, panel A
replace pop_total=pop_total/10^5
sum pop_total, detail
capture drop parameter density_pop_total
gen parameter = 0.1*_n in 1/800
label var parameter "County population (10{superscript:5}, mean 2013-2015)"
kdensity pop_total, k(gau) at(parameter) gen(density_param)
label var density_param "Density over counties"
line density_param parameter, xlab(0.1 0.2 0.3 0.5 1 2 3 5 10 20 30 50) xscale(log) lwidth(medthick) scheme(s1manual)

use town_2013_2015, clear														//Fig. A9, panel B
replace pop_total=pop_total/10^5
sum pop_total, detail
capture drop parameter density_pop_total
gen parameter = 0.01*_n in 1/700
label var parameter "Town population (10{superscript:5}, 2010)"
kdensity pop_total, k(gau) at(parameter) gen(density_param)
label var density_param "Density over towns"
line density_param parameter, xlab(0.01 0.02 0.03 0.05 0.1 0.2 0.3 0.5 1 2 3 5) xscale(log) lwidth(medthick) scheme(s1manual)


*Fig. 4: See the figure caption for the source of the estimates 
matrix pm10_pe_se = (17.31,5.48)\(7.61,4.44)\(37.00,3.63)\(24.92,3.51)\(51.86,6.20)\(40.34,4.35)	//Fig. 3, panel A
matrix pm10_ci = J(3,6,0)
matrix pm10_ci[1,1] = pm10_pe_se[1..6,1]'
matrix pm10_ci[2,1] = pm10_pe_se[1..6,1]'-invnormal(.975)*pm10_pe_se[1..6,2]'
matrix pm10_ci[3,1] = pm10_pe_se[1..6,1]'+invnormal(.975)*pm10_pe_se[1..6,2]'
matrix colnames pm10_ci = "Qin-Huai 2013-5" "Qin-Huai 2016-8" "Alternative 2013-5" "Alternative 2016-8" "Alt. 2013-5 winter" "Alt. 2016-8 winter"
coefplot matrix(pm10_ci[1,.]), ci((pm10_ci[2,.] pm10_ci[3,.])) ciopts(lcolor(olive) recast(rcap pcarrow)) xlabel(, angle(35)) ylabel(0(10)60, angle(hor) nogrid) vertical msymbol(square) mcolor(olive) ///
	msize(medium) ytitle("Jump at the boundary ({&mu}g/m{superscript:3})", size(medsmall)) title("Monitor-level PM10 samples", size(medium) color(black)) graphregion(color(white)) bgcolor(white)

matrix pm25_pe_se = (14.27,2.82)\(3.43,2.66)\(16.14,2.03)\(7.16,2.23)\(24.64,3.82)\(16.78,3.42)		//Fig. 3, panel B
matrix pm25_ci = J(3,6,0)
matrix pm25_ci[1,1] = pm25_pe_se[1..6,1]'
matrix pm25_ci[2,1] = pm25_pe_se[1..6,1]'-invnormal(.975)*pm25_pe_se[1..6,2]'
matrix pm25_ci[3,1] = pm25_pe_se[1..6,1]'+invnormal(.975)*pm25_pe_se[1..6,2]'
matrix colnames pm25_ci = "Qin-Huai 2013-5" "Qin-Huai 2016-8" "Alternative 2013-5" "Alternative 2016-8" "Alt. 2013-5 winter" "Alt. 2016-8 winter"
coefplot matrix(pm25_ci[1,.]), ci((pm25_ci[2,.] pm25_ci[3,.])) ciopts(lcolor(blue) recast(rcap pcarrow)) xlabel(, angle(35)) ylabel(0(5)30, angle(hor) nogrid) vertical msymbol(square) mcolor(blue)  ///
	msize(medium) ytitle("Jump at the boundary ({&mu}g/m{superscript:3})", size(medsmall)) title("Monitor-level PM2.5 samples", size(medium) color(black)) graphregion(color(white)) bgcolor(white)
	
matrix pe_se = (-.15,.08)\(.05,.05)\(.14,.05)\(.00,.03)\(.05,.06)\(.01,.04)							//Fig. 3, panel C
matrix ci_logpoint = J(3,6,0)
matrix ones = J(3,6,1)
matrix ci_logpoint[1,1] = pe_se[1..6,1]'
matrix ci_logpoint[2,1] = pe_se[1..6,1]'-invnormal(.975)*pe_se[1..6,2]'
matrix ci_logpoint[3,1] = pe_se[1..6,1]'+invnormal(.975)*pe_se[1..6,2]'
mata : st_matrix("ci_percent", exp(st_matrix("ci_logpoint")))		//Go to Mata to take the exponential element-by-element: change in log points to percent
matrix ci_percent = 100*(ci_percent-ones)
matrix colnames ci_percent = "Qin-Huai 2013-5" "Qin-Huai 2016-8" "Alternative 2013-5" "Alternative 2016-8" "Alt. 2013-5 over 65y" "Alt. 2016-8 over 65y"
coefplot matrix(ci_percent[1,.]), ci((ci_percent[2,.] ci_percent[3,.])) ciopts(lcolor(cranberry) recast(rcap pcarrow)) xlabel(, angle(35)) yline(0, lcolor(black)) ylabel(-30(15)30, angle(hor) nogrid) vertical msymbol(square) mcolor(cranberry) ///
	msize(medium) ytitle("Jump at the boundary (% increase in mortality)", size(medsmall)) title("County-level respiratory/cardiovascular mortality samples", size(medium) color(black)) graphregion(color(white)) bgcolor(white)

matrix pe_se = (.12,.09)\(.27,.09)\(.28,.11)\(.13,.09)\(.23,.12)\(.20,.09)							//Fig. 3, panel D
matrix ci_logpoint = J(3,6,0)
matrix ones = J(3,6,1)
matrix ci_logpoint[1,1] = pe_se[1..6,1]'
matrix ci_logpoint[2,1] = pe_se[1..6,1]'-invnormal(.975)*pe_se[1..6,2]'
matrix ci_logpoint[3,1] = pe_se[1..6,1]'+invnormal(.975)*pe_se[1..6,2]'
mata : st_matrix("ci_percent", exp(st_matrix("ci_logpoint")))		//Go to Mata to take the exponential element-by-element: change in log points to percent
matrix ci_percent = 100*(ci_percent-ones)
matrix colnames ci_percent = "Qin-Huai 2013-5" "Qin-Huai 2016-8" "Alternative 2013-5" "Alternative 2016-8" "Alt. 2013-5 over 65y" "Alt. 2016-8 over 65y"
coefplot matrix(ci_percent[1,.]), ci((ci_percent[2,.] ci_percent[3,.])) ciopts(lcolor(cranberry) recast(rcap pcarrow)) xlabel(, angle(35)) yline(0, lcolor(black)) ylabel(, angle(hor) nogrid) vertical msymbol(square) mcolor(cranberry) ///
	msize(medium) ytitle("Jump at the boundary (% increase in mortality)", size(medsmall)) title("County-level lung-cancer mortality samples", size(medium) color(black)) graphregion(color(white)) bgcolor(white)
	
	
	
******************************************************************
*** Figs. 4, 5, 6                                              ***
******************************************************************

*Fig. 4
*Select ONE
use monitor_2013_2015, clear
*use monitor_2016_2018, clear

*Generate distance bins, every 20 km
gen bin = .
replace bin = 1 if dist_real>=0 & dist_real<20 & heat_real == 1
replace bin = 2 if dist_real>=20 & dist_real<40 & heat_real == 1
replace bin = 3 if dist_real>=40 & dist_real<60 & heat_real == 1
replace bin = 4 if dist_real>=60  & dist_real<80 & heat_real == 1
replace bin = 5 if dist_real>=80  & dist_real<100 & heat_real == 1
replace bin = 6 if dist_real>=100  & dist_real<120 & heat_real == 1
replace bin = 7 if dist_real>=120  & dist_real<140 & heat_real == 1
replace bin = 8 if dist_real>=140  & dist_real<160 & heat_real == 1
replace bin = 9 if dist_real>=160  & dist_real<180 & heat_real == 1
replace bin = 10 if dist_real>=180  & dist_real<200 & heat_real == 1
replace bin = 11 if dist_real>=200  & dist_real<220 & heat_real == 1
replace bin = 12 if dist_real>=220  & dist_real<240 & heat_real == 1
replace bin = 13 if dist_real>=240  & dist_real<260 & heat_real == 1
replace bin = 14 if dist_real>=260  & dist_real<280 & heat_real == 1
replace bin = 15 if dist_real>=280  & dist_real<300 & heat_real == 1
replace bin = -1 if dist_real>0 & dist_real<20 & heat_real == 0
replace bin = -2 if dist_real>=20 & dist_real<40 & heat_real == 0
replace bin = -3 if dist_real>=40 & dist_real<60 & heat_real == 0
replace bin = -4 if dist_real>=60  & dist_real<80 & heat_real == 0
replace bin = -5 if dist_real>=80  & dist_real<100 & heat_real == 0
replace bin = -6 if dist_real>=100  & dist_real<120 & heat_real == 0
replace bin = -7 if dist_real>=120  & dist_real<140 & heat_real == 0
replace bin = -8 if dist_real>=140  & dist_real<160 & heat_real == 0
replace bin = -9 if dist_real>=160  & dist_real<180 & heat_real == 0
replace bin = -10 if dist_real>=180  & dist_real<200 & heat_real == 0
replace bin = -11 if dist_real>=200  & dist_real<220 & heat_real == 0
replace bin = -12 if dist_real>=220  & dist_real<240 & heat_real == 0
replace bin = -13 if dist_real>=240  & dist_real<260 & heat_real == 0
replace bin = -14 if dist_real>=260  & dist_real<280 & heat_real == 0
replace bin = -15 if dist_real>=280  & dist_real<300 & heat_real == 0

tab bin

tempfile t1
save `t1', replace

*Collapse to distance bin 
collapse (mean) PM10 pm25 heat_real, by(bin)
tsset bin
tsfill
drop if bin==0 
drop if bin==.
gen extra=1 

gen dist=.
replace dist = -290 if bin == -15
replace dist = -270 if bin == -14
replace dist = -250 if bin == -13
replace dist = -230 if bin == -12
replace dist = -210 if bin == -11
replace dist = -190 if bin == -10
replace dist = -170 if bin == -9
replace dist = -150 if bin == -8
replace dist = -130 if bin == -7
replace dist = -110 if bin == -6
replace dist = -90 if bin == -5
replace dist = -70 if bin == -4
replace dist = -50 if bin == -3
replace dist = -30 if bin == -2
replace dist = -10 if bin == -1
replace dist = 10 if bin == 1
replace dist = 30 if bin == 2
replace dist = 50 if bin == 3
replace dist = 70 if bin == 4
replace dist = 90 if bin == 5
replace dist = 110 if bin == 6
replace dist = 130 if bin == 7
replace dist = 150 if bin == 8
replace dist = 170 if bin == 9
replace dist = 190 if bin == 10
replace dist = 210 if bin == 11
replace dist = 230 if bin == 12
replace dist = 250 if bin == 13
replace dist = 270 if bin == 14
replace dist = 290 if bin == 15

*Append back 
append using `t1'
replace extra=0 if extra==.
tab extra

*Choose bandwidth
global weather "temp prcp"
gen runvar = dist_real
replace runvar = -dist_real if heat_real == 0

rdbwselect PM10 runvar, c(0) p(1) kernel(tri) bwselect(mserd) covs($weather)
local h_pm10 = `e(h_mserd)'

rdbwselect pm25 runvar, c(0) p(1) kernel(tri) bwselect(mserd) covs($weather)
local h_pm25 = `e(h_mserd)'

*Keep samples within 300 km
keep if dist_real <= 300 | extra == 1

*Graph pm10
lpoly PM10 dist_real if heat_real==1, degree(1) kernel(tri) bwidth(`h_pm10') gen(pm10_bin_north pm10_lpoly_north)
lpoly PM10 dist_real if heat_real==0, degree(1) kernel(tri) bwidth(`h_pm10') gen(pm10_bin_south pm10_lpoly_south)
replace pm10_bin_south = -pm10_bin_south
format PM10 %9.0fc

sum pm10_bin_north
replace pm10_bin_north = 0 if pm10_bin_north < 12
sum pm10_bin_south
replace pm10_bin_south = 0 if pm10_bin_south > -14 & pm10_bin_south < 0

# delimit ;
	scatter PM10 dist if extra==1 & heat_real == 0, sort mcolor(blue) msize(medium) xlabel(-300(50)300, angle(45)) ylabel(60(20)180, nogrid)  
	|| scatter PM10 dist if extra == 1 & heat_real == 1, sort mcolor(green) msize(medium) msymbol(circle)
	|| line pm10_lpoly_north pm10_bin_north, lwidth(1) sort lcolor(gray)  
	|| line pm10_lpoly_south pm10_bin_south
	, xline(0,lcolor(black)) lwidth(1) sort lcolor(gray) 
	xtitle("Shortest distance to the boundary (km, positive if on north side)",size(medsmall)) 
	ytitle("Mean PM10 ({&mu}g/m{superscript:3})", size(medsmall)) 
	title("Monitor-level PM10 against distance to the alternative boundary", size(medsmall) color(black)) scheme(s1manual)
	legend(off rows(1) size(medium) order(1 "PM{sub:10} in South" 2 "PM{sub:10} in North" 3 "Local Linear Regression") symxsize(medium))
	;
# delimit cr

*Graph pm25
lpoly pm25 dist_real if heat_real==1, degree(1) kernel(tri) bwidth(`h_pm25') gen(pm25_bin_north pm25_lpoly_north)
lpoly pm25 dist_real if heat_real==0, degree(1) kernel(tri) bwidth(`h_pm25') gen(pm25_bin_south pm25_lpoly_south)
replace pm25_bin_south = -pm25_bin_south
format pm25 %9.0fc

sum pm25_bin_north
replace pm25_bin_north = 0 if pm25_bin_north < 12
sum pm25_bin_south
replace pm25_bin_south = 0 if pm25_bin_south > -14 & pm25_bin_south < 0

# delimit ;
	scatter pm25 dist if extra==1 & heat_real == 0, sort mcolor(blue) msize(medium) xlabel(-300(50)300, angle(45)) ylabel(40(20)100, nogrid)  
	|| scatter pm25 dist if extra == 1 & heat_real == 1, sort mcolor(green) msize(medium) msymbol(circle)
	|| line pm25_lpoly_north pm25_bin_north, lwidth(1) sort lcolor(gray) msymbol(circle) msize(large) 
	|| line pm25_lpoly_south pm25_bin_south
	, xline(0,lcolor(black)) lwidth(1) sort lcolor(gray) msize(large) msymbol(circle) 
	xtitle("Shortest distance to the boundary (km, positive if on north side)",size(medsmall)) 
	ytitle("Mean PM2.5 ({&mu}g/m{superscript:3})", size(medsmall)) 
	title("Monitor-level PM2.5 against distance to the alternative boundary", size(medsmall) color(black)) scheme(s1manual)
	legend(off rows(1) size(medium) order(1 "PM{sub:2.5} in South" 2 "PM{sub:2.5} in North" 3 "Local Linear Regression") symxsize(medium))
	;
# delimit cr
	
	
*Fig. 5
*Select ONE
use county_2013_2015, clear
*use county_2016_2018, clear

*Generate distance bins, every 20 km
gen bin = .
replace bin = 1 if dist_real>=0 & dist_real<20 & heat_real == 1
replace bin = 2 if dist_real>=20 & dist_real<40 & heat_real == 1
replace bin = 3 if dist_real>=40 & dist_real<60 & heat_real == 1
replace bin = 4 if dist_real>=60  & dist_real<80 & heat_real == 1
replace bin = 5 if dist_real>=80  & dist_real<100 & heat_real == 1
replace bin = 6 if dist_real>=100  & dist_real<120 & heat_real == 1
replace bin = 7 if dist_real>=120  & dist_real<140 & heat_real == 1
replace bin = 8 if dist_real>=140  & dist_real<160 & heat_real == 1
replace bin = 9 if dist_real>=160  & dist_real<180 & heat_real == 1
replace bin = 10 if dist_real>=180  & dist_real<200 & heat_real == 1
replace bin = 11 if dist_real>=200  & dist_real<220 & heat_real == 1
replace bin = 12 if dist_real>=220  & dist_real<240 & heat_real == 1
replace bin = 13 if dist_real>=240  & dist_real<260 & heat_real == 1
replace bin = 14 if dist_real>=260  & dist_real<280 & heat_real == 1
replace bin = 15 if dist_real>=280  & dist_real<300 & heat_real == 1
replace bin = -1 if dist_real>0 & dist_real<20 & heat_real == 0
replace bin = -2 if dist_real>=20 & dist_real<40 & heat_real == 0
replace bin = -3 if dist_real>=40 & dist_real<60 & heat_real == 0
replace bin = -4 if dist_real>=60  & dist_real<80 & heat_real == 0
replace bin = -5 if dist_real>=80  & dist_real<100 & heat_real == 0
replace bin = -6 if dist_real>=100  & dist_real<120 & heat_real == 0
replace bin = -7 if dist_real>=120  & dist_real<140 & heat_real == 0
replace bin = -8 if dist_real>=140  & dist_real<160 & heat_real == 0
replace bin = -9 if dist_real>=160  & dist_real<180 & heat_real == 0
replace bin = -10 if dist_real>=180  & dist_real<200 & heat_real == 0
replace bin = -11 if dist_real>=200  & dist_real<220 & heat_real == 0
replace bin = -12 if dist_real>=220  & dist_real<240 & heat_real == 0
replace bin = -13 if dist_real>=240  & dist_real<260 & heat_real == 0
replace bin = -14 if dist_real>=260  & dist_real<280 & heat_real == 0
replace bin = -15 if dist_real>=280  & dist_real<300 & heat_real == 0

tab bin

*For each distance bin, generate total population
bysort bin: egen totalpop_sum=sum(pop_total)
replace totalpop_sum = totalpop_sum/10^5

tempfile t1
save `t1', replace

*Collapse to distance bin 
collapse (mean) temp prcp PM10 pm25 heat_real, by(bin)
tsset bin
tsfill
drop if bin==0
drop if bin==.
gen extra=1 

gen dist=.
replace dist = -290 if bin == -15
replace dist = -270 if bin == -14
replace dist = -250 if bin == -13
replace dist = -230 if bin == -12
replace dist = -210 if bin == -11
replace dist = -190 if bin == -10
replace dist = -170 if bin == -9
replace dist = -150 if bin == -8
replace dist = -130 if bin == -7
replace dist = -110 if bin == -6
replace dist = -90 if bin == -5
replace dist = -70 if bin == -4
replace dist = -50 if bin == -3
replace dist = -30 if bin == -2
replace dist = -10 if bin == -1
replace dist = 10 if bin == 1
replace dist = 30 if bin == 2
replace dist = 50 if bin == 3
replace dist = 70 if bin == 4
replace dist = 90 if bin == 5
replace dist = 110 if bin == 6
replace dist = 130 if bin == 7
replace dist = 150 if bin == 8
replace dist = 170 if bin == 9
replace dist = 190 if bin == 10
replace dist = 210 if bin == 11
replace dist = 230 if bin == 12
replace dist = 250 if bin == 13
replace dist = 270 if bin == 14
replace dist = 290 if bin == 15

*Append back 
append using `t1'
replace extra=0 if extra==.
tab extra
count if totalpop_sum==.
bysort bin: egen mtotalpop=mean(totalpop_sum)

bys bin: sum mtotalpop

*Choose bandwidth
global weather "temp prcp"
gen runvar = dist_real
replace runvar = -dist_real if heat_real == 0

rdbwselect PM10 runvar, c(0) p(1) kernel(tri) bwselect(mserd) covs($weather) weights(pop_total)
local h_pm10 = `e(h_mserd)'

rdbwselect pm25 runvar, c(0) p(1) kernel(tri) bwselect(mserd) covs($weather) weights(pop_total)
local h_pm25 = `e(h_mserd)'

rdbwselect temp runvar, c(0) p(1) kernel(tri) bwselect(mserd) weights(pop_total)
local h_temp = `e(h_mserd)'

rdbwselect prcp runvar, c(0) p(1) kernel(tri) bwselect(mserd) weights(pop_total)
local h_prcp = `e(h_mserd)'

*Keep samples within 300 km
keep if dist_real <= 300 | extra == 1

*Graph temperature
lpoly temp dist_real if heat_real==1, degree(1) kernel(tri) bwidth(`h_temp') gen(temp_bin_north temp_lpoly_north)
lpoly temp dist_real if heat_real==0, degree(1) kernel(tri) bwidth(`h_temp') gen(temp_bin_south temp_lpoly_south)
replace temp_bin_south = -temp_bin_south

sum temp_bin_north
replace temp_bin_north = 0 if temp_bin_north < 8
sum temp_bin_south
replace temp_bin_south = 0 if temp_bin_south > -9 & temp_bin_south < 0

# delimit ;
	scatter temp dist [aw=mtotalpop] if extra==1 & heat_real == 0, sort mcolor(blue) msize(medium) mfcolor(white) xlabel(-300(50)300, angle(45)) ylabel(10(2)20, nogrid)  
	|| scatter temp dist [aw=mtotalpop] if extra==1 & heat_real == 1, sort mcolor(green) msize(medium) mfcolor(white) msymbol(circle)
	|| line temp_lpoly_north temp_bin_north, lwidth(1) sort lcolor(gray) msymbol(circle) msize(large) 
	|| line temp_lpoly_south temp_bin_south 
	, xline(0,lcolor(black)) lwidth(1) sort lcolor(gray) msize(large) msymbol(circle) 
	xtitle("Shortest distance to the boundary (km, positive if on north side)",size(medsmall)) 
	ytitle("Mean temperature (°C)", size(medsmall)) 
	title("Temperature against distance to the alternative boundary", size(medsmall) color(black)) scheme(s1manual)
	legend(off rows(1) size(medium) order(1 "Temp in South" 2 "Temp in North" 3 "Local Linear Regression") symxsize(medium))
	;
# delimit cr

*Graph precipitation
lpoly prcp dist_real if heat_real==1, degree(1) kernel(tri) bwidth(`h_prcp') gen(prcp_bin_north prcp_lpoly_north)
lpoly prcp dist_real if heat_real==0, degree(1) kernel(tri) bwidth(`h_prcp') gen(prcp_bin_south prcp_lpoly_south)
replace prcp_bin_south = -prcp_bin_south

sum prcp_bin_north
replace prcp_bin_north = 0 if prcp_bin_north < 8
sum prcp_bin_south
replace prcp_bin_south = 0 if prcp_bin_south > -9 & prcp_bin_south < 0

# delimit ;
	scatter prcp dist [aw=mtotalpop] if extra==1 & heat_real == 0, sort mcolor(blue) msize(medium) mfcolor(white) xlabel(-300(50)300, angle(45)) ylabel(0(0.05)0.25, nogrid) 
	|| scatter prcp dist [aw=mtotalpop] if extra==1 & heat_real==1, sort mcolor(green) msize(medium) mfcolor(white) msymbol(circle)
	|| line prcp_lpoly_north prcp_bin_north, lwidth(1) sort lcolor(gray) msymbol(circle) msize(large) 
	|| line prcp_lpoly_south prcp_bin_south 
	, xline(0,lcolor(black)) lwidth(1) sort lcolor(gray) msize(large) msymbol(circle) 
	xtitle("Shortest distance to the boundary (km, positive if on north side)",size(medsmall)) 
	ytitle("Mean precipitation (inches/day)", size(medsmall)) 
	title("Precipitation against distance to the alternative boundary", size(medsmall) color(black)) scheme(s1manual)
	legend(off rows(1) size(medium) order(1 "Precp in South" 2 "Precp in North" 3 "Local Linear Regression") symxsize(medium))
	;
# delimit cr


*Fig. 6
*Select ONE
use $tmp\monitor_2013_2015_winter, clear
*use $tmp\monitor_2013_2015_nonwinter, clear
*use $tmp\monitor_2016_2018_winter, clear
*use $tmp\monitor_2016_2018_nonwinter, clear

*Generate distance bins, every 20 km
gen bin = .
replace bin = 1 if dist_real>=0 & dist_real<20 & heat_real == 1
replace bin = 2 if dist_real>=20 & dist_real<40 & heat_real == 1
replace bin = 3 if dist_real>=40 & dist_real<60 & heat_real == 1
replace bin = 4 if dist_real>=60  & dist_real<80 & heat_real == 1
replace bin = 5 if dist_real>=80  & dist_real<100 & heat_real == 1
replace bin = 6 if dist_real>=100  & dist_real<120 & heat_real == 1
replace bin = 7 if dist_real>=120  & dist_real<140 & heat_real == 1
replace bin = 8 if dist_real>=140  & dist_real<160 & heat_real == 1
replace bin = 9 if dist_real>=160  & dist_real<180 & heat_real == 1
replace bin = 10 if dist_real>=180  & dist_real<200 & heat_real == 1
replace bin = 11 if dist_real>=200  & dist_real<220 & heat_real == 1
replace bin = 12 if dist_real>=220  & dist_real<240 & heat_real == 1
replace bin = 13 if dist_real>=240  & dist_real<260 & heat_real == 1
replace bin = 14 if dist_real>=260  & dist_real<280 & heat_real == 1
replace bin = 15 if dist_real>=280  & dist_real<300 & heat_real == 1
replace bin = -1 if dist_real>0 & dist_real<20 & heat_real == 0
replace bin = -2 if dist_real>=20 & dist_real<40 & heat_real == 0
replace bin = -3 if dist_real>=40 & dist_real<60 & heat_real == 0
replace bin = -4 if dist_real>=60  & dist_real<80 & heat_real == 0
replace bin = -5 if dist_real>=80  & dist_real<100 & heat_real == 0
replace bin = -6 if dist_real>=100  & dist_real<120 & heat_real == 0
replace bin = -7 if dist_real>=120  & dist_real<140 & heat_real == 0
replace bin = -8 if dist_real>=140  & dist_real<160 & heat_real == 0
replace bin = -9 if dist_real>=160  & dist_real<180 & heat_real == 0
replace bin = -10 if dist_real>=180  & dist_real<200 & heat_real == 0
replace bin = -11 if dist_real>=200  & dist_real<220 & heat_real == 0
replace bin = -12 if dist_real>=220  & dist_real<240 & heat_real == 0
replace bin = -13 if dist_real>=240  & dist_real<260 & heat_real == 0
replace bin = -14 if dist_real>=260  & dist_real<280 & heat_real == 0
replace bin = -15 if dist_real>=280  & dist_real<300 & heat_real == 0

tab bin

tempfile t1
save `t1', replace

*Collapse to distance bin 
collapse (mean) PM10 pm25 heat_real, by(bin)
tsset bin
tsfill
drop if bin==0 
drop if bin==.
gen extra=1 

gen dist=.
replace dist = -290 if bin == -15
replace dist = -270 if bin == -14
replace dist = -250 if bin == -13
replace dist = -230 if bin == -12
replace dist = -210 if bin == -11
replace dist = -190 if bin == -10
replace dist = -170 if bin == -9
replace dist = -150 if bin == -8
replace dist = -130 if bin == -7
replace dist = -110 if bin == -6
replace dist = -90 if bin == -5
replace dist = -70 if bin == -4
replace dist = -50 if bin == -3
replace dist = -30 if bin == -2
replace dist = -10 if bin == -1
replace dist = 10 if bin == 1
replace dist = 30 if bin == 2
replace dist = 50 if bin == 3
replace dist = 70 if bin == 4
replace dist = 90 if bin == 5
replace dist = 110 if bin == 6
replace dist = 130 if bin == 7
replace dist = 150 if bin == 8
replace dist = 170 if bin == 9
replace dist = 190 if bin == 10
replace dist = 210 if bin == 11
replace dist = 230 if bin == 12
replace dist = 250 if bin == 13
replace dist = 270 if bin == 14
replace dist = 290 if bin == 15

*Append back 
append using `t1'
replace extra=0 if extra==.
tab extra

*Choose bandwidth
global weather "temp prcp"
gen runvar = dist_real
replace runvar = -dist_real if heat_real == 0

rdbwselect PM10 runvar, c(0) p(1) kernel(tri) bwselect(mserd) covs($weather)
local h_pm10 = `e(h_mserd)'

rdbwselect pm25 runvar, c(0) p(1) kernel(tri) bwselect(mserd) covs($weather)
local h_pm25 = `e(h_mserd)'

*Keep samples within 300 km
keep if dist_real <= 300 | extra == 1

*Graph pm10
lpoly PM10 dist_real if heat_real==1, degree(1) kernel(tri) bwidth(`h_pm10') gen(pm10_bin_north pm10_lpoly_north)
lpoly PM10 dist_real if heat_real==0, degree(1) kernel(tri) bwidth(`h_pm10') gen(pm10_bin_south pm10_lpoly_south)
replace pm10_bin_south = -pm10_bin_south
format PM10 %9.0fc

sum pm10_bin_north
replace pm10_bin_north = 0 if pm10_bin_north < 12
sum pm10_bin_south
replace pm10_bin_south = 0 if pm10_bin_south > -14 & pm10_bin_south < 0

# delimit ;
	scatter PM10 dist if extra==1 & heat_real == 0, sort mcolor(blue) msize(medium) xlabel(-300(50)300, angle(45)) ylabel(20(20)180, nogrid)  
	|| scatter PM10 dist if extra == 1 & heat_real == 1, sort mcolor(green) msize(medium) msymbol(circle)
	|| line pm10_lpoly_north pm10_bin_north, lwidth(1) sort lcolor(gray)  
	|| line pm10_lpoly_south pm10_bin_south
	, xline(0,lcolor(black)) lwidth(1) sort lcolor(gray) 
	xtitle("Shortest distance to the boundary (km, positive if on north side)",size(medsmall)) 
	ytitle("Mean PM10 ({&mu}g/m{superscript:3})", size(medsmall)) 
	title("Monitor-level PM10 against distance to the alternative boundary", size(medsmall) color(black)) scheme(s1manual)
	legend(off rows(1) size(medium) order(1 "PM{sub:10} in South" 2 "PM{sub:10} in North" 3 "Local Linear Regression") symxsize(medium))
	;
# delimit cr

*Graph pm25
lpoly pm25 dist_real if heat_real==1, degree(1) kernel(tri) bwidth(`h_pm25') gen(pm25_bin_north pm25_lpoly_north)
lpoly pm25 dist_real if heat_real==0, degree(1) kernel(tri) bwidth(`h_pm25') gen(pm25_bin_south pm25_lpoly_south)

replace pm25_bin_south = -pm25_bin_south
format pm25 %9.0fc

sum pm25_bin_north
replace pm25_bin_north = 0 if pm25_bin_north < 12
sum pm25_bin_south
replace pm25_bin_south = 0 if pm25_bin_south > -14 & pm25_bin_south < 0

# delimit ;
	scatter pm25 dist if extra==1 & heat_real == 0, sort mcolor(blue) msize(medium) xlabel(-300(50)300, angle(45)) ylabel(20(20)120, nogrid)  
	|| scatter pm25 dist if extra == 1 & heat_real == 1, sort mcolor(green) msize(medium) msymbol(circle)
	|| line pm25_lpoly_north pm25_bin_north, lwidth(1) sort lcolor(gray) msymbol(circle) msize(large) 
	|| line pm25_lpoly_south pm25_bin_south
	, xline(0,lcolor(black)) lwidth(1) sort lcolor(gray) msize(large) msymbol(circle) 
	xtitle("Shortest distance to the boundary (km, positive if on north side)",size(medsmall)) 
	ytitle("Mean PM2.5 ({&mu}g/m{superscript:3})", size(medsmall)) 
	title("Monitor-level PM2.5 against distance to the alternative boundary", size(medsmall) color(black)) scheme(s1manual)
	legend(off rows(1) size(medium) order(1 "PM{sub:2.5} in South" 2 "PM{sub:2.5} in North" 3 "Local Linear Regression") symxsize(medium))
	;
# delimit cr